{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from glob import glob\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "table1 = pd.read_excel(r'E:\\\23研赛\\建模2023\\建模2023\\2023年中国研究生数学建模竞赛赛题\\E题\\E题\\数据\\竞赛发布数据\\表1-患者列表及临床信息.xlsx')\n",
    "table2 = pd.read_excel(r'E:\\\23研赛\\建模2023\\建模2023\\2023年中国研究生数学建模竞赛赛题\\E题\\E题\\数据\\竞赛发布数据\\表2-患者影像信息血肿及水肿的体积及位置.xlsx')\n",
    "table3_Hemo = pd.read_excel(r'E:\\\23研赛\\建模2023\\建模2023\\2023年中国研究生数学建模竞赛赛题\\E题\\E题\\数据\\竞赛发布数据\\表3-患者影像信息血肿及水肿的形状及灰度分布.xlsx', sheet_name='Hemo')\n",
    "table3_ED = pd.read_excel(r'E:\\\23研赛\\建模2023\\建模2023\\2023年中国研究生数学建模竞赛赛题\\E题\\E题\\数据\\竞赛发布数据\\表3-患者影像信息血肿及水肿的形状及灰度分布.xlsx', sheet_name='ED')\n",
    "table4 = pd.read_excel(r'E:\\\23研赛\\建模2023\\建模2023\\2023年中国研究生数学建模竞赛赛题\\E题\\E题\\数据\\竞赛发布数据\\表4-答案文件更新后.xlsx',skiprows=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "table4.drop(0, inplace=True)\n",
    "table4.columns = ['ID'] + table4.columns.tolist()[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "table1.columns = ['ID'] + table1.columns.tolist()[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "table2 = table2.iloc[:, :24]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((160, 23), (160, 24), (545, 33), (576, 33), (160, 10))"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "table1.shape, table2.shape, table3_Hemo.shape, table3_ED.shape, table4.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(table2['首次检查流水号'] == table1['入院首次影像检查流水号']).any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>首次检查流水号</th>\n",
       "      <th>HM_volume</th>\n",
       "      <th>HM_ACA_R_Ratio</th>\n",
       "      <th>HM_MCA_R_Ratio</th>\n",
       "      <th>HM_PCA_R_Ratio</th>\n",
       "      <th>HM_Pons_Medulla_R_Ratio</th>\n",
       "      <th>HM_Cerebellum_R_Ratio</th>\n",
       "      <th>HM_ACA_L_Ratio</th>\n",
       "      <th>HM_MCA_L_Ratio</th>\n",
       "      <th>...</th>\n",
       "      <th>ED_ACA_R_Ratio</th>\n",
       "      <th>ED_MCA_R_Ratio</th>\n",
       "      <th>ED_PCA_R_Ratio</th>\n",
       "      <th>ED_Pons_Medulla_R_Ratio</th>\n",
       "      <th>ED_Cerebellum_R_Ratio</th>\n",
       "      <th>ED_ACA_L_Ratio</th>\n",
       "      <th>ED_MCA_L_Ratio</th>\n",
       "      <th>ED_PCA_L_Ratio</th>\n",
       "      <th>ED_Pons_Medulla_L_Ratio</th>\n",
       "      <th>ED_Cerebellum_L_Ratio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>sub001</td>\n",
       "      <td>20161212002136</td>\n",
       "      <td>69714</td>\n",
       "      <td>0.000258</td>\n",
       "      <td>0.877112</td>\n",
       "      <td>0.120148</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.108833</td>\n",
       "      <td>0.726241</td>\n",
       "      <td>0.136511</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>sub002</td>\n",
       "      <td>20160406002131</td>\n",
       "      <td>47500</td>\n",
       "      <td>0.496000</td>\n",
       "      <td>0.180779</td>\n",
       "      <td>0.302316</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.003032</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.198121</td>\n",
       "      <td>0.544249</td>\n",
       "      <td>0.246876</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.002083</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>sub003</td>\n",
       "      <td>20160413000006</td>\n",
       "      <td>86396</td>\n",
       "      <td>0.053718</td>\n",
       "      <td>0.829078</td>\n",
       "      <td>0.099681</td>\n",
       "      <td>0.000197</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.162533</td>\n",
       "      <td>0.748352</td>\n",
       "      <td>0.075013</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sub004</td>\n",
       "      <td>20161215001667</td>\n",
       "      <td>45498</td>\n",
       "      <td>0.002242</td>\n",
       "      <td>0.002198</td>\n",
       "      <td>0.029100</td>\n",
       "      <td>0.000374</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.163458</td>\n",
       "      <td>0.313618</td>\n",
       "      <td>...</td>\n",
       "      <td>0.020864</td>\n",
       "      <td>0.002298</td>\n",
       "      <td>0.012483</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.444685</td>\n",
       "      <td>0.433689</td>\n",
       "      <td>0.081880</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>sub005</td>\n",
       "      <td>20161222000978</td>\n",
       "      <td>14832</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.001146</td>\n",
       "      <td>0.000337</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000202</td>\n",
       "      <td>0.413363</td>\n",
       "      <td>...</td>\n",
       "      <td>0.044501</td>\n",
       "      <td>0.418341</td>\n",
       "      <td>0.011690</td>\n",
       "      <td>0.003735</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.080288</td>\n",
       "      <td>0.341260</td>\n",
       "      <td>0.078093</td>\n",
       "      <td>0.001435</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>155</th>\n",
       "      <td>sub156</td>\n",
       "      <td>20200306000927</td>\n",
       "      <td>32559</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.033754</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.038545</td>\n",
       "      <td>0.922049</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.370986</td>\n",
       "      <td>0.629014</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>156</th>\n",
       "      <td>sub157</td>\n",
       "      <td>20201009003102</td>\n",
       "      <td>18150</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.997410</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>157</th>\n",
       "      <td>sub158</td>\n",
       "      <td>20200410001952</td>\n",
       "      <td>27969</td>\n",
       "      <td>0.002682</td>\n",
       "      <td>0.937359</td>\n",
       "      <td>0.059959</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.009583</td>\n",
       "      <td>0.808725</td>\n",
       "      <td>0.160686</td>\n",
       "      <td>0.003622</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158</th>\n",
       "      <td>sub159</td>\n",
       "      <td>20200218000582</td>\n",
       "      <td>53154</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.004572</td>\n",
       "      <td>0.857678</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.147678</td>\n",
       "      <td>0.711019</td>\n",
       "      <td>0.124740</td>\n",
       "      <td>0.008247</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>159</th>\n",
       "      <td>sub160</td>\n",
       "      <td>20200821002584</td>\n",
       "      <td>49019</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.907505</td>\n",
       "      <td>0.092372</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.231984</td>\n",
       "      <td>0.679948</td>\n",
       "      <td>0.066373</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>160 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         ID         首次检查流水号  HM_volume  HM_ACA_R_Ratio  HM_MCA_R_Ratio  \\\n",
       "0    sub001  20161212002136      69714        0.000258        0.877112   \n",
       "1    sub002  20160406002131      47500        0.496000        0.180779   \n",
       "2    sub003  20160413000006      86396        0.053718        0.829078   \n",
       "3    sub004  20161215001667      45498        0.002242        0.002198   \n",
       "4    sub005  20161222000978      14832        0.000000        0.000000   \n",
       "..      ...             ...        ...             ...             ...   \n",
       "155  sub156  20200306000927      32559        0.000000        0.033754   \n",
       "156  sub157  20201009003102      18150        0.000000        0.000000   \n",
       "157  sub158  20200410001952      27969        0.002682        0.937359   \n",
       "158  sub159  20200218000582      53154        0.000000        0.000000   \n",
       "159  sub160  20200821002584      49019        0.000000        0.907505   \n",
       "\n",
       "     HM_PCA_R_Ratio  HM_Pons_Medulla_R_Ratio  HM_Cerebellum_R_Ratio  \\\n",
       "0          0.120148                 0.000000                    0.0   \n",
       "1          0.302316                 0.000000                    0.0   \n",
       "2          0.099681                 0.000197                    0.0   \n",
       "3          0.029100                 0.000374                    0.0   \n",
       "4          0.001146                 0.000337                    0.0   \n",
       "..              ...                      ...                    ...   \n",
       "155        0.000000                 0.000000                    0.0   \n",
       "156        0.000000                 0.000000                    0.0   \n",
       "157        0.059959                 0.000000                    0.0   \n",
       "158        0.000000                 0.000000                    0.0   \n",
       "159        0.092372                 0.000000                    0.0   \n",
       "\n",
       "     HM_ACA_L_Ratio  HM_MCA_L_Ratio  ...  ED_ACA_R_Ratio  ED_MCA_R_Ratio  \\\n",
       "0          0.000000        0.000000  ...        0.108833        0.726241   \n",
       "1          0.003032        0.000000  ...        0.198121        0.544249   \n",
       "2          0.000000        0.000000  ...        0.162533        0.748352   \n",
       "3          0.163458        0.313618  ...        0.020864        0.002298   \n",
       "4          0.000202        0.413363  ...        0.044501        0.418341   \n",
       "..              ...             ...  ...             ...             ...   \n",
       "155        0.038545        0.922049  ...        0.000000        0.000000   \n",
       "156        0.000000        0.997410  ...        0.000000        0.000000   \n",
       "157        0.000000        0.000000  ...        0.009583        0.808725   \n",
       "158        0.004572        0.857678  ...        0.000000        0.000000   \n",
       "159        0.000000        0.000000  ...        0.231984        0.679948   \n",
       "\n",
       "     ED_PCA_R_Ratio  ED_Pons_Medulla_R_Ratio  ED_Cerebellum_R_Ratio  \\\n",
       "0          0.136511                 0.000000                    0.0   \n",
       "1          0.246876                 0.000000                    0.0   \n",
       "2          0.075013                 0.000000                    0.0   \n",
       "3          0.012483                 0.000000                    0.0   \n",
       "4          0.011690                 0.003735                    0.0   \n",
       "..              ...                      ...                    ...   \n",
       "155        0.000000                 0.000000                    0.0   \n",
       "156        0.000000                 0.000000                    0.0   \n",
       "157        0.160686                 0.003622                    0.0   \n",
       "158        0.000000                 0.000000                    0.0   \n",
       "159        0.066373                 0.000000                    0.0   \n",
       "\n",
       "     ED_ACA_L_Ratio  ED_MCA_L_Ratio  ED_PCA_L_Ratio  ED_Pons_Medulla_L_Ratio  \\\n",
       "0          0.000000        0.000000        0.000000                 0.000000   \n",
       "1          0.000000        0.000000        0.002083                 0.000000   \n",
       "2          0.000000        0.000000        0.000000                 0.000000   \n",
       "3          0.444685        0.433689        0.081880                 0.000000   \n",
       "4          0.080288        0.341260        0.078093                 0.001435   \n",
       "..              ...             ...             ...                      ...   \n",
       "155        0.370986        0.629014        0.000000                 0.000000   \n",
       "156        0.000000        1.000000        0.000000                 0.000000   \n",
       "157        0.000000        0.000000        0.000000                 0.000000   \n",
       "158        0.147678        0.711019        0.124740                 0.008247   \n",
       "159        0.000000        0.000000        0.000000                 0.000000   \n",
       "\n",
       "     ED_Cerebellum_L_Ratio  \n",
       "0                      0.0  \n",
       "1                      0.0  \n",
       "2                      0.0  \n",
       "3                      0.0  \n",
       "4                      0.0  \n",
       "..                     ...  \n",
       "155                    0.0  \n",
       "156                    0.0  \n",
       "157                    0.0  \n",
       "158                    0.0  \n",
       "159                    0.0  \n",
       "\n",
       "[160 rows x 24 columns]"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "table2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(160, 45)\n"
     ]
    }
   ],
   "source": [
    "concat_data = pd.concat([table1,table2.iloc[:,2:]], axis=1)\n",
    "print(concat_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(160, 78)\n"
     ]
    }
   ],
   "source": [
    "concat_data = pd.merge(concat_data, table3_Hemo, left_on = '入院首次影像检查流水号', right_on='流水号', how='inner')\n",
    "print(concat_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(160, 111)\n"
     ]
    }
   ],
   "source": [
    "concat_data = pd.merge(concat_data, table3_ED, left_on = '入院首次影像检查流水号', right_on='流水号', how='inner')\n",
    "print(concat_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "concat_data['高压'] = concat_data['血压'].apply(lambda x: int(x.split('/')[0]))\n",
    "concat_data['低压'] = concat_data['血压'].apply(lambda x: int(x.split('/')[1]))\n",
    "concat_data.sort_values(by='ID', inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "concat_data.drop(['血压','流水号_x', '流水号_y','备注_x','备注_y'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "concat_data.to_excel('q1_concat1.xlsx', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>90天mRS</th>\n",
       "      <th>数据集划分</th>\n",
       "      <th>入院首次影像检查流水号</th>\n",
       "      <th>年龄</th>\n",
       "      <th>性别</th>\n",
       "      <th>脑出血前mRS评分</th>\n",
       "      <th>高血压病史</th>\n",
       "      <th>卒中病史</th>\n",
       "      <th>糖尿病史</th>\n",
       "      <th>...</th>\n",
       "      <th>NCCT_original_firstorder_Median_y</th>\n",
       "      <th>NCCT_original_firstorder_Minimum_y</th>\n",
       "      <th>NCCT_original_firstorder_Range_y</th>\n",
       "      <th>NCCT_original_firstorder_RobustMeanAbsoluteDeviation_y</th>\n",
       "      <th>NCCT_original_firstorder_RootMeanSquared_y</th>\n",
       "      <th>NCCT_original_firstorder_Skewness_y</th>\n",
       "      <th>NCCT_original_firstorder_Uniformity_y</th>\n",
       "      <th>NCCT_original_firstorder_Variance_y</th>\n",
       "      <th>高压</th>\n",
       "      <th>低压</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>sub001</td>\n",
       "      <td>4.0</td>\n",
       "      <td>训练</td>\n",
       "      <td>20161212002136</td>\n",
       "      <td>43</td>\n",
       "      <td>女</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>54.277975</td>\n",
       "      <td>2.630186</td>\n",
       "      <td>88.333203</td>\n",
       "      <td>5.421960</td>\n",
       "      <td>54.182743</td>\n",
       "      <td>-0.680312</td>\n",
       "      <td>0.156607</td>\n",
       "      <td>116.141179</td>\n",
       "      <td>180</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>sub002</td>\n",
       "      <td>0.0</td>\n",
       "      <td>训练</td>\n",
       "      <td>20160406002131</td>\n",
       "      <td>58</td>\n",
       "      <td>男</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>37.518627</td>\n",
       "      <td>11.670399</td>\n",
       "      <td>54.126259</td>\n",
       "      <td>3.986374</td>\n",
       "      <td>37.958520</td>\n",
       "      <td>-0.176969</td>\n",
       "      <td>0.132046</td>\n",
       "      <td>53.076248</td>\n",
       "      <td>199</td>\n",
       "      <td>120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>sub003</td>\n",
       "      <td>5.0</td>\n",
       "      <td>训练</td>\n",
       "      <td>20160413000006</td>\n",
       "      <td>78</td>\n",
       "      <td>男</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>35.410096</td>\n",
       "      <td>-1.715890</td>\n",
       "      <td>70.139106</td>\n",
       "      <td>5.382112</td>\n",
       "      <td>36.444232</td>\n",
       "      <td>-0.057096</td>\n",
       "      <td>0.128060</td>\n",
       "      <td>91.740851</td>\n",
       "      <td>199</td>\n",
       "      <td>120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>sub004</td>\n",
       "      <td>4.0</td>\n",
       "      <td>训练</td>\n",
       "      <td>20161215001667</td>\n",
       "      <td>70</td>\n",
       "      <td>男</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>30.926762</td>\n",
       "      <td>-20.991359</td>\n",
       "      <td>92.048538</td>\n",
       "      <td>6.191501</td>\n",
       "      <td>32.204819</td>\n",
       "      <td>-0.297133</td>\n",
       "      <td>0.145292</td>\n",
       "      <td>125.651157</td>\n",
       "      <td>186</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>sub005</td>\n",
       "      <td>3.0</td>\n",
       "      <td>训练</td>\n",
       "      <td>20161222000978</td>\n",
       "      <td>51</td>\n",
       "      <td>男</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>46.492200</td>\n",
       "      <td>-9.889141</td>\n",
       "      <td>96.821808</td>\n",
       "      <td>6.482624</td>\n",
       "      <td>47.428035</td>\n",
       "      <td>-0.227378</td>\n",
       "      <td>0.146488</td>\n",
       "      <td>134.720581</td>\n",
       "      <td>135</td>\n",
       "      <td>92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>155</th>\n",
       "      <td>sub156</td>\n",
       "      <td>NaN</td>\n",
       "      <td>测试2</td>\n",
       "      <td>20200306000927</td>\n",
       "      <td>87</td>\n",
       "      <td>女</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>31.471558</td>\n",
       "      <td>-19.978141</td>\n",
       "      <td>111.297179</td>\n",
       "      <td>7.310788</td>\n",
       "      <td>33.601480</td>\n",
       "      <td>-0.068704</td>\n",
       "      <td>0.151482</td>\n",
       "      <td>169.061898</td>\n",
       "      <td>202</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>156</th>\n",
       "      <td>sub157</td>\n",
       "      <td>NaN</td>\n",
       "      <td>测试2</td>\n",
       "      <td>20201009003102</td>\n",
       "      <td>52</td>\n",
       "      <td>男</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>48.533011</td>\n",
       "      <td>30.460776</td>\n",
       "      <td>29.788433</td>\n",
       "      <td>2.260258</td>\n",
       "      <td>48.602582</td>\n",
       "      <td>-0.239727</td>\n",
       "      <td>0.125220</td>\n",
       "      <td>24.684739</td>\n",
       "      <td>180</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>157</th>\n",
       "      <td>sub158</td>\n",
       "      <td>NaN</td>\n",
       "      <td>测试2</td>\n",
       "      <td>20200410001952</td>\n",
       "      <td>57</td>\n",
       "      <td>男</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>53.345998</td>\n",
       "      <td>20.456862</td>\n",
       "      <td>65.707611</td>\n",
       "      <td>4.718427</td>\n",
       "      <td>53.690717</td>\n",
       "      <td>-0.079589</td>\n",
       "      <td>0.136352</td>\n",
       "      <td>73.002568</td>\n",
       "      <td>233</td>\n",
       "      <td>135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158</th>\n",
       "      <td>sub159</td>\n",
       "      <td>NaN</td>\n",
       "      <td>测试2</td>\n",
       "      <td>20200218000582</td>\n",
       "      <td>47</td>\n",
       "      <td>男</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>37.092745</td>\n",
       "      <td>-5.551496</td>\n",
       "      <td>72.713624</td>\n",
       "      <td>5.802628</td>\n",
       "      <td>38.494810</td>\n",
       "      <td>-0.202350</td>\n",
       "      <td>0.120587</td>\n",
       "      <td>114.191236</td>\n",
       "      <td>183</td>\n",
       "      <td>122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>159</th>\n",
       "      <td>sub160</td>\n",
       "      <td>NaN</td>\n",
       "      <td>测试2</td>\n",
       "      <td>20200821002584</td>\n",
       "      <td>80</td>\n",
       "      <td>女</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>44.975763</td>\n",
       "      <td>7.592122</td>\n",
       "      <td>71.920477</td>\n",
       "      <td>5.207770</td>\n",
       "      <td>45.857394</td>\n",
       "      <td>-0.149273</td>\n",
       "      <td>0.132971</td>\n",
       "      <td>92.463632</td>\n",
       "      <td>208</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>160 rows × 108 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         ID  90天mRS 数据集划分     入院首次影像检查流水号  年龄 性别  脑出血前mRS评分  高血压病史  卒中病史  \\\n",
       "0    sub001     4.0    训练  20161212002136  43  女          0      0     0   \n",
       "1    sub002     0.0    训练  20160406002131  58  男          0      1     0   \n",
       "2    sub003     5.0    训练  20160413000006  78  男          0      1     0   \n",
       "4    sub004     4.0    训练  20161215001667  70  男          2      1     1   \n",
       "6    sub005     3.0    训练  20161222000978  51  男          0      0     0   \n",
       "..      ...     ...   ...             ...  .. ..        ...    ...   ...   \n",
       "155  sub156     NaN   测试2  20200306000927  87  女          0      1     0   \n",
       "156  sub157     NaN   测试2  20201009003102  52  男          2      1     1   \n",
       "157  sub158     NaN   测试2  20200410001952  57  男          0      1     0   \n",
       "158  sub159     NaN   测试2  20200218000582  47  男          0      1     0   \n",
       "159  sub160     NaN   测试2  20200821002584  80  女          0      1     0   \n",
       "\n",
       "     糖尿病史  ...  NCCT_original_firstorder_Median_y  \\\n",
       "0       0  ...                          54.277975   \n",
       "1       0  ...                          37.518627   \n",
       "2       0  ...                          35.410096   \n",
       "4       0  ...                          30.926762   \n",
       "6       0  ...                          46.492200   \n",
       "..    ...  ...                                ...   \n",
       "155     0  ...                          31.471558   \n",
       "156     0  ...                          48.533011   \n",
       "157     0  ...                          53.345998   \n",
       "158     0  ...                          37.092745   \n",
       "159     1  ...                          44.975763   \n",
       "\n",
       "     NCCT_original_firstorder_Minimum_y  NCCT_original_firstorder_Range_y  \\\n",
       "0                              2.630186                         88.333203   \n",
       "1                             11.670399                         54.126259   \n",
       "2                             -1.715890                         70.139106   \n",
       "4                            -20.991359                         92.048538   \n",
       "6                             -9.889141                         96.821808   \n",
       "..                                  ...                               ...   \n",
       "155                          -19.978141                        111.297179   \n",
       "156                           30.460776                         29.788433   \n",
       "157                           20.456862                         65.707611   \n",
       "158                           -5.551496                         72.713624   \n",
       "159                            7.592122                         71.920477   \n",
       "\n",
       "     NCCT_original_firstorder_RobustMeanAbsoluteDeviation_y  \\\n",
       "0                                             5.421960        \n",
       "1                                             3.986374        \n",
       "2                                             5.382112        \n",
       "4                                             6.191501        \n",
       "6                                             6.482624        \n",
       "..                                                 ...        \n",
       "155                                           7.310788        \n",
       "156                                           2.260258        \n",
       "157                                           4.718427        \n",
       "158                                           5.802628        \n",
       "159                                           5.207770        \n",
       "\n",
       "     NCCT_original_firstorder_RootMeanSquared_y  \\\n",
       "0                                     54.182743   \n",
       "1                                     37.958520   \n",
       "2                                     36.444232   \n",
       "4                                     32.204819   \n",
       "6                                     47.428035   \n",
       "..                                          ...   \n",
       "155                                   33.601480   \n",
       "156                                   48.602582   \n",
       "157                                   53.690717   \n",
       "158                                   38.494810   \n",
       "159                                   45.857394   \n",
       "\n",
       "     NCCT_original_firstorder_Skewness_y  \\\n",
       "0                              -0.680312   \n",
       "1                              -0.176969   \n",
       "2                              -0.057096   \n",
       "4                              -0.297133   \n",
       "6                              -0.227378   \n",
       "..                                   ...   \n",
       "155                            -0.068704   \n",
       "156                            -0.239727   \n",
       "157                            -0.079589   \n",
       "158                            -0.202350   \n",
       "159                            -0.149273   \n",
       "\n",
       "     NCCT_original_firstorder_Uniformity_y  \\\n",
       "0                                 0.156607   \n",
       "1                                 0.132046   \n",
       "2                                 0.128060   \n",
       "4                                 0.145292   \n",
       "6                                 0.146488   \n",
       "..                                     ...   \n",
       "155                               0.151482   \n",
       "156                               0.125220   \n",
       "157                               0.136352   \n",
       "158                               0.120587   \n",
       "159                               0.132971   \n",
       "\n",
       "     NCCT_original_firstorder_Variance_y   高压   低压  \n",
       "0                             116.141179  180   90  \n",
       "1                              53.076248  199  120  \n",
       "2                              91.740851  199  120  \n",
       "4                             125.651157  186   99  \n",
       "6                             134.720581  135   92  \n",
       "..                                   ...  ...  ...  \n",
       "155                           169.061898  202  100  \n",
       "156                            24.684739  180  110  \n",
       "157                            73.002568  233  135  \n",
       "158                           114.191236  183  122  \n",
       "159                            92.463632  208   95  \n",
       "\n",
       "[160 rows x 108 columns]"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "concat_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
